Now that we have a database made, we need to make a lookup table that tells us what the most likely population size is given that we've had n draws and m unique observations made. We will create a query for the n,m lookup and run it iteratively.


In [16]:
import sqlite3
import numpy as np

In [17]:
conn = sqlite3.connect('ICCP-database.sqlite')
c = conn.cursor()

In [20]:
outputdict = {}
for n in range(110):
    for m in range(n+1):
        c.execute('select count(draws.sim_num), sim.n from draws inner join sim on sim.sim_num=draws.sim_num where '\
                  'draws.draw_num={0} and uniques={1} group by n'.format(n+1,m+1))
        result = c.fetchall()
        c.execute('select count(sim_num), n from sim where max_draw<{0} and n>={1} group by n;'.format(n, m))
        result2 = c.fetchall()
        result = np.asarray(result)
        result2 = np.asarray(result2)
        if np.size(result)==0: result = np.reshape(result, (0,2))
        if np.size(result2)==0: result2 = np.reshape(result2, (0,2))
        result2 = np.vstack((result, result2))
        result = []
        for i in np.unique(result2[:,1]):
            total = sum(result2[result2[:,1]==i,0])
            result.append((i,total))
        result = np.asarray(result)
        if np.size(result)==0:
            outputdict[n+1,m+1]=(np.array([0]), np.array([0]), 0)
        else:
            max_n = result[result[:,1]== max(result[:,1]),0]
            likelihood = max(result[:,1])/sum(result[:,1])
            outputdict[n+1,m+1]=(max_n.flatten(), likelihood, max(result[:,1]))


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-20-eb93f92b3b1b> in <module>()
      2 for n in range(110):
      3     for m in range(n+1):
----> 4         c.execute('select count(draws.sim_num), sim.n from draws inner join sim on sim.sim_num=draws.sim_num where '                  'draws.draw_num={0} and uniques={1} group by n'.format(n+1,m+1))
      5         result = c.fetchall()
      6         c.execute('select count(sim_num), n from sim where max_draw<{0} and n>={1} group by n;'.format(n, m))

KeyboardInterrupt: 

In [14]:
conn.commit()
conn.close()